# The Partisan Ties of Lobbying firms
# Research & Politics
# Alexander C. Furnas, Michael T. Heaney, and Timothy M. LaPira
# August 28, 2019
# This code was developed in R version 3.6.1 (2019-07-05) and may not function as intended in other versions of R.

setwd("/Users/alexanderfurnas/Downloads/dataverse_files (1)")

# Install packages, if necessary

install.packages("tidyverse") # 1.2.1
install.packages("irr") # 0.84.1
install.packages("plm") # 2.0-1
install.packages("lmtest") # 0.9-37
install.packages("sandwich") # 2.5-1
install.packages("car") # 3.0-3
install.packages("Amelia") # 1.7.5
install.packages("weights") # 1.0
install.packages("devtools") # 2.1.0

# Open libraries

library(tidyverse)
library(irr)
library(plm)
library(lmtest)
library(sandwich)
library(car)
library(Amelia)
library(weights)
library(devtools)

# Installing KBAL using Github and opening its library

 install_github("https://github.com/chadhazlett/KBAL") # 0.0.0.9000 
# Be sure to disable any proxies in your internet settings, as they may prevent R from accessing Github.

library(KBAL)

# Set up workspace and read data

rm(list=ls())

firm_quarterly_fixed <- read.csv("firm_postHOLGAfixedpanel6.9.19.csv")
firm_panel_fixed <- pdata.frame(firm_quarterly_fixed, index = c("registrant", "year_q_int"))
firm_quarterly <- read.csv("firm_postHOLGAquarterlypanel6.6.19.csv")
firm_panel <- pdata.frame(firm_quarterly, index = c("registrant", "year_q"))
firmfixident <- read.csv("FixedFirmParties.csv")
firm_data_year <- read.csv("final_nonimputed_yearly.csv")
firm_data_year <- firm_data_year %>% left_join(firmfixident, by=c("registrant" = "Registrant"))
firm_data_year$website.dum[is.na(firm_data_year$website.dum)] <- 0
firm_data_year$total_contrib[is.na(firm_data_year$total_contrib)] <- 0
firm_data_year$Rcontribs[is.na(firm_data_year$Rcontribs)] <- 0
firm_contributions_yearly <- read.csv("TimeVaryingFirmParties.csv")
fixedfirmident <- read_csv("FixedFirmParties.csv")
lobbying <- read_delim("lob_lobbying.txt", delim = ",", quote = "|", col_names = F) %>% filter(X4 == "y", X13 == "y") %>% dplyr::select(X1, X3, X15)
names(lobbying) <- c("UniqID", "Registrant", "Year")
lobbyists <- read_delim("lob_lobbyist.txt", delim = ",", quote = "|", col_names = F) %>% dplyr::select(X1, X3, X4)
names(lobbyists) <- c("UniqID", "Lobbyist", "contributor_ext_id")
lobbying <- lobbying %>% left_join(lobbyists)
firm_lobbyists <- lobbying %>% dplyr::select( Registrant,Year,Lobbyist,contributor_ext_id) %>% distinct()
cf_indiv <- read.csv("lobbyist_yearly_contrib_totals.csv")

##### Fleiss's k 
set.seed(12345)
sink("Fleissk.txt")

firm_quarterly_fixed$firm_type95d <- 0
firm_quarterly_fixed$firm_type95d[firm_quarterly_fixed$Dfirm_CF95 == 1] <- 1
firm_quarterly_fixed$firm_type95d[firm_quarterly_fixed$Rfirm_CF95 == 1] <- 2
firm_quarterly_fixed$firm_type95d[firm_quarterly_fixed$Bfirm_CF95 == 1] <- 3

firm_quarterly_fixed$firm_type90d <- 0
firm_quarterly_fixed$firm_type90d[firm_quarterly_fixed$Dfirm_CF90 == 1] <- 1
firm_quarterly_fixed$firm_type90d[firm_quarterly_fixed$Rfirm_CF90 == 1] <- 2
firm_quarterly_fixed$firm_type90d[firm_quarterly_fixed$Bfirm_CF90 == 1] <- 3

firm_quarterly_fixed$firm_type85d <- 0
firm_quarterly_fixed$firm_type85d[firm_quarterly_fixed$Dfirm_CF85 == 1] <- 1
firm_quarterly_fixed$firm_type85d[firm_quarterly_fixed$Rfirm_CF85 == 1] <- 2
firm_quarterly_fixed$firm_type85d[firm_quarterly_fixed$Bfirm_CF85 == 1] <- 3

cat_overall_partisan_classification <- dplyr::select(firm_quarterly_fixed, registrant, firm_type85d, firm_type90d, firm_type95d) %>% unique() %>% select(-registrant)

kappam.fleiss(as.matrix(cat_overall_partisan_classification))

firm_quarterly$firm_type95d <- 0
firm_quarterly$firm_type95d[firm_quarterly$Dfirm_CF95 == 1] <- 1
firm_quarterly$firm_type95d[firm_quarterly$Rfirm_CF95 == 1] <- 2
firm_quarterly$firm_type95d[firm_quarterly$Bfirm_CF95 == 1] <- 3

firm_quarterly$firm_type90d <- 0
firm_quarterly$firm_type90d[firm_quarterly$Dfirm_CF90 == 1] <- 1
firm_quarterly$firm_type90d[firm_quarterly$Rfirm_CF90 == 1] <- 2
firm_quarterly$firm_type90d[firm_quarterly$Bfirm_CF90 == 1] <- 3

firm_quarterly$firm_type85d <- 0
firm_quarterly$firm_type85d[firm_quarterly$Dfirm_CF85 == 1] <- 1
firm_quarterly$firm_type85d[firm_quarterly$Rfirm_CF85 == 1] <- 2
firm_quarterly$firm_type85d[firm_quarterly$Bfirm_CF85 == 1] <- 3

cat_overall_partisan_classification <- dplyr::select(firm_quarterly, registrant, firm_type85d, firm_type90d, firm_type95d) %>% unique() %>% select(-registrant)

kappam.fleiss(as.matrix(cat_overall_partisan_classification))

cor.test(firm_quarterly_fixed$firm_type85d, firm_quarterly$firm_type85d)
cor.test(firm_quarterly_fixed$firm_type90d, firm_quarterly$firm_type90d)
cor.test(firm_quarterly_fixed$firm_type95d, firm_quarterly$firm_type95d)

sink() #"Fleissk.txt")

##### Validation Exercise
set.seed(12345)
sink("Validation.txt")

firm_quarterly$Dfound <- 0
firm_quarterly$Dfound[firm_quarterly$D.founders > 0] <- 1
firm_quarterly$Dfound[firm_quarterly$R.founders > 0] <- 0
firm_quarterly$Rfound <- 0
firm_quarterly$Rfound[firm_quarterly$R.founders > 0] <- 1
firm_quarterly$Rfound[firm_quarterly$D.founders > 0] <- 0
firm_quarterly$Demfd <- 0
firm_quarterly$Demfd[firm_quarterly$D.founders > 0] <- 1
firm_quarterly$Repfd <- 0
firm_quarterly$Repfd[firm_quarterly$R.founders > 0] <- 1
firm_quarterly$bipart <- 0
firm_quarterly$bipart <- firm_quarterly$Demfd + firm_quarterly$Repfd
firm_quarterly$Dfound[firm_quarterly$bipart == 0] <- NA
firm_quarterly$Rfound[firm_quarterly$bipart == 0] <- NA

cor.test(firm_quarterly$Dfirm_CF95, firm_quarterly$Dfound, na.act=na.rm)
cor.test(firm_quarterly$Dfirm_CF90, firm_quarterly$Dfound, na.act=na.rm)
cor.test(firm_quarterly$Dfirm_CF85, firm_quarterly$Dfound, na.act=na.rm)

cor.test(firm_quarterly$Rfirm_CF95, firm_quarterly$Rfound, na.act=na.rm)
cor.test(firm_quarterly$Rfirm_CF90, firm_quarterly$Rfound, na.act=na.rm)
cor.test(firm_quarterly$Rfirm_CF85, firm_quarterly$Rfound, na.act=na.rm)

firm_quarterly_fixed$Dfound <- 0
firm_quarterly_fixed$Dfound[firm_quarterly_fixed$D.founders > 0] <- 1
firm_quarterly_fixed$Dfound[firm_quarterly_fixed$R.founders > 0] <- 0
firm_quarterly_fixed$Rfound <- 0
firm_quarterly_fixed$Rfound[firm_quarterly_fixed$R.founders > 0] <- 1
firm_quarterly_fixed$Rfound[firm_quarterly_fixed$D.founders > 0] <- 0
firm_quarterly_fixed$Demfd <- 0
firm_quarterly_fixed$Demfd[firm_quarterly_fixed$D.founders > 0] <- 1
firm_quarterly_fixed$Repfd <- 0
firm_quarterly_fixed$Repfd[firm_quarterly_fixed$R.founders > 0] <- 1
firm_quarterly_fixed$bipart <- 0
firm_quarterly_fixed$bipart <- firm_quarterly_fixed$Demfd + firm_quarterly_fixed$Repfd
firm_quarterly_fixed$Dfound[firm_quarterly_fixed$bipart == 0] <- NA
firm_quarterly_fixed$Rfound[firm_quarterly_fixed$bipart == 0] <- NA

cor.test(firm_quarterly_fixed$Dfirm_CF95, firm_quarterly_fixed$Dfound, na.act=na.rm)
cor.test(firm_quarterly_fixed$Dfirm_CF90, firm_quarterly_fixed$Dfound, na.act=na.rm)
cor.test(firm_quarterly_fixed$Dfirm_CF85, firm_quarterly_fixed$Dfound, na.act=na.rm)

cor.test(firm_quarterly_fixed$Rfirm_CF95, firm_quarterly_fixed$Rfound, na.act=na.rm)
cor.test(firm_quarterly_fixed$Rfirm_CF90, firm_quarterly_fixed$Rfound, na.act=na.rm)
cor.test(firm_quarterly_fixed$Rfirm_CF85, firm_quarterly_fixed$Rfound, na.act=na.rm)

sink() #"Validation.txt")

##### Table 1.  Variable Definitions and Descriptive Statistics

sink("Table_1.txt")
set.seed(12345)
mean(firm_quarterly_fixed$rev.perlobbyist.2015)
sd(firm_quarterly_fixed$rev.perlobbyist.2015)
summary(firm_quarterly_fixed$rev.perlobbyist.2015)
missing_percent = (sum(is.na(firm_quarterly_fixed$rev.perlobbyist.2015)) / nrow(firm_quarterly_fixed)) * 100
missing_percent

mean(firm_quarterly_fixed$h.aligned_cf85)
sd(firm_quarterly_fixed$h.aligned_cf85)
summary(firm_quarterly_fixed$h.aligned_cf85)
missing_percent = (sum(is.na(firm_quarterly_fixed$h.aligned_cf85)) / nrow(firm_quarterly_fixed)) * 100
missing_percent

mean(firm_quarterly_fixed$h.aligned_cf)
sd(firm_quarterly_fixed$h.aligned_cf)
summary(firm_quarterly_fixed$h.aligned_cf)
missing_percent = (sum(is.na(firm_quarterly_fixed$h.aligned_cf)) / nrow(firm_quarterly_fixed)) * 100
missing_percent

mean(firm_quarterly_fixed$h.aligned_cf95)
sd(firm_quarterly_fixed$h.aligned_cf95)
summary(firm_quarterly_fixed$h.aligned_cf95)
missing_percent = (sum(is.na(firm_quarterly_fixed$h.aligned_cf95)) / nrow(firm_quarterly_fixed)) * 100
missing_percent

mean(firm_quarterly$h.aligned_cf85)
sd(firm_quarterly$h.aligned_cf85)
summary(firm_quarterly$h.aligned_cf85)
missing_percent = (sum(is.na(firm_quarterly$h.aligned_cf85)) / nrow(firm_quarterly)) * 100
missing_percent

mean(firm_quarterly$h.aligned_cf)
sd(firm_quarterly$h.aligned_cf)
summary(firm_quarterly$h.aligned_cf)
missing_percent = (sum(is.na(firm_quarterly$h.aligned_cf)) / nrow(firm_quarterly)) * 100
missing_percent

mean(firm_quarterly_fixed$h.aligned_cf95)
sd(firm_quarterly_fixed$h.aligned_cf95)
summary(firm_quarterly_fixed$h.aligned_cf95)
missing_percent = (sum(is.na(firm_quarterly$h.aligned_cf95)) / nrow(firm_quarterly)) * 100
missing_percent

mean(firm_quarterly_fixed$s.aligned_cf85)
sd(firm_quarterly_fixed$s.aligned_cf85)
summary(firm_quarterly_fixed$s.aligned_cf85)
missing_percent = (sum(is.na(firm_quarterly_fixed$s.aligned_cf85)) / nrow(firm_quarterly_fixed)) * 100
missing_percent

mean(firm_quarterly_fixed$s.aligned_cf)
sd(firm_quarterly_fixed$s.aligned_cf)
summary(firm_quarterly_fixed$s.aligned_cf)
missing_percent = (sum(is.na(firm_quarterly_fixed$s.aligned_cf)) / nrow(firm_quarterly_fixed)) * 100
missing_percent

mean(firm_quarterly_fixed$s.aligned_cf95)
sd(firm_quarterly_fixed$s.aligned_cf95)
summary(firm_quarterly_fixed$s.aligned_cf95)
missing_percent = (sum(is.na(firm_quarterly_fixed$s.aligned_cf)) / nrow(firm_quarterly_fixed)) * 100
missing_percent

mean(firm_quarterly$s.aligned_cf85)
sd(firm_quarterly$s.aligned_cf85)
summary(firm_quarterly$s.aligned_cf85)
missing_percent = (sum(is.na(firm_quarterly$s.aligned_cf85)) / nrow(firm_quarterly)) * 100
missing_percent

mean(firm_quarterly$s.aligned_cf)
sd(firm_quarterly$s.aligned_cf)
summary(firm_quarterly$s.aligned_cf)
missing_percent = (sum(is.na(firm_quarterly$s.aligned_cf)) / nrow(firm_quarterly)) * 100
missing_percent

mean(firm_quarterly$s.aligned_cf95)
sd(firm_quarterly$s.aligned_cf95)
summary(firm_quarterly$s.aligned_cf95)
missing_percent = (sum(is.na(firm_quarterly$s.aligned_cf95)) / nrow(firm_quarterly)) * 100
missing_percent

mean(firm_quarterly_fixed$num.clients)
sd(firm_quarterly_fixed$num.clients)
summary(firm_quarterly_fixed$num.clients)
missing_percent = (sum(is.na(firm_quarterly_fixed$num.clients)) / nrow(firm_quarterly_fixed)) * 100
missing_percent

mean(firm_quarterly_fixed$client.div.index)
sd(firm_quarterly_fixed$client.div.index)
summary(firm_quarterly_fixed$client.div.index)
missing_percent = (sum(is.na(firm_quarterly_fixed$client.div.index)) / nrow(firm_quarterly_fixed)) * 100
missing_percent

mean(subset(firm_quarterly_fixed$law.firm.dum, firm_quarterly_fixed$law.firm.dum>-1))
sd(subset(firm_quarterly_fixed$law.firm.dum, firm_quarterly_fixed$law.firm.dum>-1))
summary(firm_quarterly_fixed$law.firm.dum, firm_quarterly_fixed$law.firm.dum>-1)
missing_percent = (sum(is.na(firm_quarterly_fixed$law.firm.dum)) / nrow(firm_quarterly_fixed)) * 100
missing_percent

mean(subset(firm_quarterly_fixed$international.offices.dum, firm_quarterly_fixed$international.offices.dum>-1))
sd(subset(firm_quarterly_fixed$international.offices.dum, firm_quarterly_fixed$international.offices.dum>-1))
summary(firm_quarterly_fixed$international.offices.dum, firm_quarterly_fixed$international.offices.dum>-1)
missing_percent = (sum(is.na(firm_quarterly_fixed$international.offices.dum)) / nrow(firm_quarterly_fixed)) * 100
missing_percent

mean(subset(firm_quarterly_fixed$num.domestic.offices, firm_quarterly_fixed$num.domestic.offices>-1))
sd(subset(firm_quarterly_fixed$num.domestic.offices, firm_quarterly_fixed$num.domestic.offices>-1))
summary(subset(firm_quarterly_fixed$num.domestic.offices, firm_quarterly_fixed$num.domestic.offices>-1))
missing_percent = (sum(is.na(firm_quarterly_fixed$num.domestic.offices)) / nrow(firm_quarterly_fixed)) * 100
missing_percent

mean(subset(firm_quarterly_fixed$age, firm_quarterly_fixed$age>-1))
sd(subset(firm_quarterly_fixed$age, firm_quarterly_fixed$age>-1))
summary(subset(firm_quarterly_fixed$age, firm_quarterly_fixed$age>-1))
missing_percent = (sum(is.na(firm_quarterly_fixed$age)) / nrow(firm_quarterly_fixed)) * 100
missing_percent

sink() #"Table_2.txt")

##### Figure 1.  Distribution of Partisan Giving by Lobbying Firms 

sink("Figure_1.txt")
set.seed(12345)

Rcontribs_aggregated <- aggregate(x = firm_quarterly$Rcontribs, by = list(unique.values = firm_quarterly$registrant), FUN = "mean")
total_contrib_aggregated <- aggregate(x = firm_quarterly$total_contrib, by = list(unique.values = firm_quarterly$registrant), FUN = "mean")
Rcontribs_agg <- subset(Rcontribs_aggregated$x, !is.na(Rcontribs_aggregated$x)&!is.na(total_contrib_aggregated$x))
total_contrib_agg <- subset(total_contrib_aggregated$x, !is.na(Rcontribs_aggregated$x)&!is.na(total_contrib_aggregated$x))
pct_repub_cont1 <- (Rcontribs_agg / total_contrib_agg) * 100

Rcontribs <- subset(firm_quarterly$Rcontribs, !is.na(firm_quarterly$Rcontribs)&!is.na(firm_quarterly$total_contrib))
total_contrib <- subset(firm_quarterly$total_contrib, !is.na(firm_quarterly$Rcontribs)&!is.na(firm_quarterly$total_contrib))
pct_repub_cont2 <- (Rcontribs / total_contrib) * 100

pct_repub_cont1_categories <- as.numeric(cut(subset(pct_repub_cont1, !is.na(pct_repub_cont1) & pct_repub_cont1 <= 100), breaks=c(-0.0000000000001,5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95,100))) 
pct_repub_cont2_categories <- as.numeric(cut(subset(pct_repub_cont2, !is.na(pct_repub_cont2) & pct_repub_cont2 <= 100), breaks=c(-0.0000000000001,5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95,100)))
cn1 <- nrow(as.matrix(subset(pct_repub_cont1, !is.na(pct_repub_cont1) & pct_repub_cont1 <= 100)))
cn2 <- nrow(as.matrix(subset(pct_repub_cont1, !is.na(pct_repub_cont2) & pct_repub_cont2 <= 100)))
vn1 <- tabulate(pct_repub_cont1_categories)
vn2 <- tabulate(pct_repub_cont2_categories)
Percentage <- c(5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95,100)
Time_Fixed <- (vn1 / cn1) * 100
Time_Varying <- (vn2 / cn2) * 100 
newdata <- as.data.frame(cbind(Percentage,Time_Fixed, Time_Varying))
newdata

##### Figure 1 Note.  Percent of Firms Not Included in Figure 1 because of No Campaign Giving

length(unique(firm_quarterly_fixed$registrant[is.na(firm_quarterly_fixed$D_contribs)]))/length(unique(firm_quarterly_fixed$registrant))  #

sink() #"Figure_1.txt")

##### Figure 2.  Revenue Trends Lobbying Firms, 2008-2016

firm_contributions_overall_indata <- firmfixident %>% filter(Registrant %in% firm_quarterly_fixed$registrant )

firms <- firm_quarterly_fixed %>% select(registrant,year_q, Rfirm_CF90, Bfirm_CF90, Dfirm_CF90, rev.perlobbyist.2015) %>% unique()
firms$firm_type <- "Unclassified"
firms$firm_type[firms$Bfirm_CF90 == 1] <- "Bipartisan"
firms$firm_type[firms$Rfirm_CF90 == 1] <- "Republican"
firms$firm_type[firms$Dfirm_CF90 == 1] <- "Democratic"

firms <- firms %>% group_by(firm_type, year_q) %>% summarise(med_rev = median(rev.perlobbyist.2015))

firm_timeseries <- ggplot(filter(firms, firm_type != "Other"), aes(x=year_q,y=med_rev, group=firm_type, linetype = as.factor(firm_type))) +geom_line() +theme_bw(base_size=14) + theme(axis.text.x = element_text(angle = 90, hjust = 1, size=8)) +scale_y_continuous(labels = scales::dollar, limits = c(0, 80000)) + xlab("Year/Quarter") + ylab("Median revenue per lobbyist") + labs(linetype='Firm Type')
ggsave("firm_timeseries_postHLOGA.pdf",plot=firm_timeseries, device="pdf", width = 5, height = 4, units = "in")

##### Table 2.  Firm Revenue per Lobbyist -- Panel Linear Models with Firm Random Effects, Temporal Fixed, amd Multiple Imputation for Missing Data in Firm Attributes

sink("Table_2.txt")

# Model 1.1 Fixed-effects model with Multiple Imputation -- time-fixed partisan identities -- 85% threshold

set.seed(12345)

firm_quarterly_fixed_impute <- firm_quarterly_fixed %>% dplyr::select(rev.perlobbyist.2015,registrant,year,num.lobbyists,num.clients,law.firm.dum,
                                                                      international.offices.dum,num.domestic.offices,age,h.aligned,s.aligned,year_q_int,D_contribs,R_contribs,D_share,h.aligned_cf,s.aligned_cf,h.aligned_cf85,s.aligned_cf85,h.aligned_cf95,s.aligned_cf95,industry.diversity,issue.diversity, client.div.index)
col_nums <- c(6,7,8,9,13,14,15)
lowers <- c(0,0,1,0,0,0,0)
uppers <- c(1,1,83,180,4427780,3060261,1)

constraint_mat <-matrix(c(col_nums,lowers,uppers),nrow=length(col_nums))
firm_quarterly_fixed_impute$year_q_int <- as.numeric(as.character(firm_quarterly_fixed_impute$year_q_int))

a.out_q_fixed <- amelia(as.data.frame(firm_quarterly_fixed_impute), m=100,ts="year_q_int", cs="registrant", p2s = 2, idvars = c("client.div.index"), logs = c("num.lobbyists","num.clients","industry.diversity","issue.diversity","num.domestic.offices","D_contribs","R_contribs","age"), bounds = constraint_mat)

pdf('QuarterlyFixedPanel_Imputation.pdf')
plot(a.out_q_fixed, which.vars = c(6,7,8,9,13,14,15))
dev.off()
pdf('QuarterlyFixedPanel_Dispersion.pdf')
disperse(a.out_q_fixed, dims = 1, m = 100)
dev.off()
summary(a.out_q_fixed)

rm.amelia.out <- lapply(a.out_q_fixed$imputations, function(i) plm(rev.perlobbyist.2015 ~ h.aligned_cf85 + s.aligned_cf85 + num.clients + client.div.index + law.firm.dum + international.offices.dum + num.domestic.offices  + age + as.factor(year_q_int), data = pdata.frame(i,index = c("registrant", "year_q_int")),  model="random"))
coeftests.amelia <- lapply(rm.amelia.out, function(i) coeftest(i, vcovHC(i, type="HC3", method = "arellano", cluster="group")))

coefnames <- rownames(coeftests.amelia[[1]])

lintests <- lapply(rm.amelia.out, function(i) linearHypothesis(i, "h.aligned_cf85 =  s.aligned_cf85",  white.adjust = "hc3", test = "F"))
lintestF <- mean(do.call(rbind, lapply(lintests, function(i) i$F))[,2])

lintestpval <- mean(do.call(rbind, lapply(lintests, function(i) i$`Pr(>F)`))[,2])

lintests[[1]]

coefs.amelia <- do.call(rbind, lapply(coeftests.amelia, function(i) i[,1]))
ses.amelia <- do.call(rbind, lapply(coeftests.amelia, function(i) i[, 2]))
random_effects <- mi.meld(coefs.amelia, ses.amelia)

results.mi.re <- cbind(coefnames, as.vector(random_effects$q.mi), as.vector(random_effects$se.mi))
results.mi.re

adjrsq.amelia <- do.call(rbind, lapply(rm.amelia.out[1:100], function(i) summary(i)$r.squared[2]))
mean(adjrsq.amelia)

fstat.amelia <- do.call(rbind, lapply(rm.amelia.out[1:100], function(i) summary(i)$fstatistic$statistic))
mean(fstat.amelia)

summary(rm.amelia.out[[1]])

# Model 1.2 Fixed-effects model with Multiple Imputation -- time-fixed partisan identities -- 90% threshold

rm.amelia.out <- lapply(a.out_q_fixed$imputations, function(i) plm(rev.perlobbyist.2015 ~ h.aligned_cf + s.aligned_cf + num.clients + client.div.index + law.firm.dum + international.offices.dum + num.domestic.offices  + age + as.factor(year_q_int), data = pdata.frame(i,index = c("registrant", "year_q_int")),  model="random"))
coeftests.amelia <- lapply(rm.amelia.out, function(i) coeftest(i, vcovHC(i, type="HC3", method = "arellano", cluster="group")))

coefnames <- rownames(coeftests.amelia[[1]])

lintests <- lapply(rm.amelia.out, function(i) linearHypothesis(i, "h.aligned_cf =  s.aligned_cf",  white.adjust = "hc3", test = "F"))
lintestF <- mean(do.call(rbind, lapply(lintests, function(i) i$F))[,2])

lintestpval <- mean(do.call(rbind, lapply(lintests, function(i) i$`Pr(>F)`))[,2])

lintests[[1]]

coefs.amelia <- do.call(rbind, lapply(coeftests.amelia, function(i) i[,1]))
ses.amelia <- do.call(rbind, lapply(coeftests.amelia, function(i) i[, 2]))
random_effects <- mi.meld(coefs.amelia, ses.amelia)

results.mi.re <- cbind(coefnames, as.vector(random_effects$q.mi), as.vector(random_effects$se.mi))
results.mi.re

adjrsq.amelia <- do.call(rbind, lapply(rm.amelia.out[1:100], function(i) summary(i)$r.squared[2]))
mean(adjrsq.amelia)

fstat.amelia <- do.call(rbind, lapply(rm.amelia.out[1:100], function(i) summary(i)$fstatistic$statistic))
mean(fstat.amelia)

summary(rm.amelia.out[[1]])

# Model 1.3 Fixed-effects model with Multiple Imputation -- time-fixed partisan identities -- 95% threshold

rm.amelia.out <- lapply(a.out_q_fixed$imputations, function(i) plm(rev.perlobbyist.2015 ~ h.aligned_cf95 + s.aligned_cf95 + num.clients + client.div.index + law.firm.dum + international.offices.dum + num.domestic.offices + age + as.factor(year_q_int), data = pdata.frame(i,index = c("registrant", "year_q_int")), model="random"))
coeftests.amelia <- lapply(rm.amelia.out, function(i) coeftest(i, vcovHC(i, type="HC3", method = "arellano", cluster="group")))

coefnames <- rownames(coeftests.amelia[[1]])

lintests <- lapply(rm.amelia.out, function(i) linearHypothesis(i, "h.aligned_cf95 = s.aligned_cf95", white.adjust = "hc3", test = "F"))
lintestF <- mean(do.call(rbind, lapply(lintests, function(i) i$F))[,2])

lintestpval <- mean(do.call(rbind, lapply(lintests, function(i) i$`Pr(>F)`))[,2])

lintests[[1]]

coefs.amelia <- do.call(rbind, lapply(coeftests.amelia, function(i) i[,1]))
ses.amelia <- do.call(rbind, lapply(coeftests.amelia, function(i) i[, 2]))
random_effects <- mi.meld(coefs.amelia, ses.amelia)

results.mi.re <- cbind(coefnames, as.vector(random_effects$q.mi), as.vector(random_effects$se.mi))
results.mi.re

adjrsq.amelia <- do.call(rbind, lapply(rm.amelia.out[1:100], function(i) summary(i)$r.squared[2]))
mean(adjrsq.amelia)

fstat.amelia <- do.call(rbind, lapply(rm.amelia.out[1:100], function(i) summary(i)$fstatistic$statistic))
mean(fstat.amelia)

summary(rm.amelia.out[[1]])

# Model 1.4 Fixed-effects model with Multiple Imputation -- time-varying partisan identities -- 85% threshold

set.seed(12345)

firm_quarterly_impute <- firm_quarterly%>% dplyr::select(rev.perlobbyist.2015,registrant,year,num.lobbyists,num.clients,law.firm.dum,
                                                                      international.offices.dum,num.domestic.offices,age,h.aligned,s.aligned,year_q_int,D_contribs,R_contribs,D_share,h.aligned_cf,s.aligned_cf,h.aligned_cf85,s.aligned_cf85,h.aligned_cf95,s.aligned_cf95,industry.diversity,issue.diversity,client.div.index)
col_nums <- c(6,7,8,9,13,14,15)
lowers <- c(0,0,1,0,0,0,0)
uppers <- c(1,1,83,180,4427780,3060261,1)

constraint_mat <-matrix(c(col_nums,lowers, uppers),nrow=length(col_nums))
firm_quarterly_impute$year_q_int <- as.numeric(as.character(firm_quarterly_impute$year_q_int))
a.out_q_varying <- amelia(as.data.frame(firm_quarterly_impute), m=100,ts="year_q_int", cs="registrant", p2s = 2, idvars = c("client.div.index"), logs = c("num.lobbyists","num.clients","industry.diversity","issue.diversity","num.domestic.offices","D_contribs","R_contribs","age"), bounds = constraint_mat)

pdf('QuarterlyVaryingPanel_Imputation.pdf')
plot(a.out_q_varying, which.vars = c(6,7,8,9,13,14,15))
dev.off()
pdf('QuarterlyVaryingPanel_Dispersion.pdf')
disperse(a.out_q_varying, dims = 1, m = 100)
dev.off()
summary(a.out_q_varying)

rm.amelia.out <- lapply(a.out_q_varying$imputations, function(i) plm(rev.perlobbyist.2015 ~ h.aligned_cf85 + s.aligned_cf85 + num.clients + client.div.index + law.firm.dum + international.offices.dum + num.domestic.offices + age + as.factor(year_q_int), data = pdata.frame(i,index = c("registrant", "year_q_int")), model="random"))
coeftests.amelia <- lapply(rm.amelia.out, function(i) coeftest(i, vcovHC(i, type="HC3", method = "arellano", cluster="group")))

coefnames <- rownames(coeftests.amelia[[1]])

lintests <- lapply(rm.amelia.out, function(i) linearHypothesis(i, "h.aligned_cf85 = s.aligned_cf85", white.adjust = "hc3", test = "F"))
lintestF <- mean(do.call(rbind, lapply(lintests, function(i) i$F))[,2])

lintestpval <- mean(do.call(rbind, lapply(lintests, function(i) i$`Pr(>F)`))[,2])
lintests[[1]]

coefs.amelia <- do.call(rbind, lapply(coeftests.amelia, function(i) i[,1]))
ses.amelia <- do.call(rbind, lapply(coeftests.amelia, function(i) i[, 2]))
random_effects<- mi.meld(coefs.amelia, ses.amelia)

results.mi.re <- cbind(coefnames, as.vector(random_effects$q.mi), as.vector(random_effects$se.mi))
results.mi.re

adjrsq.amelia <- do.call(rbind, lapply(rm.amelia.out[1:100], function(i) summary(i)$r.squared[2]))
mean(adjrsq.amelia)

fstat.amelia <- do.call(rbind, lapply(rm.amelia.out[1:100], function(i) summary(i)$fstatistic$statistic))
mean(fstat.amelia)

summary(rm.amelia.out[[1]])

# Model 1.5 Fixed-effects model with Multiple Imputation -- time-varying partisan identities -- 90% threshold


rm.amelia.out <- lapply(a.out_q_varying$imputations, function(i) plm(rev.perlobbyist.2015 ~ h.aligned_cf + s.aligned_cf + num.clients + client.div.index + law.firm.dum + international.offices.dum + num.domestic.offices + age + as.factor(year_q_int), data = pdata.frame(i,index = c("registrant", "year_q_int")), model="random"))
coeftests.amelia <- lapply(rm.amelia.out, function(i) coeftest(i, vcovHC(i, type="HC3", method = "arellano", cluster="group")))

coefnames <- rownames(coeftests.amelia[[1]])

lintests <- lapply(rm.amelia.out, function(i) linearHypothesis(i, "h.aligned_cf = s.aligned_cf", white.adjust = "hc3", test = "F"))
lintestF <- mean(do.call(rbind, lapply(lintests, function(i) i$F))[,2])

lintestpval <- mean(do.call(rbind, lapply(lintests, function(i) i$`Pr(>F)`))[,2])
lintests[[1]]

coefs.amelia <- do.call(rbind, lapply(coeftests.amelia, function(i) i[,1]))
ses.amelia <- do.call(rbind, lapply(coeftests.amelia, function(i) i[, 2]))
random_effects<- mi.meld(coefs.amelia, ses.amelia)

results.mi.re <- cbind(coefnames, as.vector(random_effects$q.mi), as.vector(random_effects$se.mi))
results.mi.re

adjrsq.amelia <- do.call(rbind, lapply(rm.amelia.out[1:100], function(i) summary(i)$r.squared[2]))
mean(adjrsq.amelia)

fstat.amelia <- do.call(rbind, lapply(rm.amelia.out[1:100], function(i) summary(i)$fstatistic$statistic))
mean(fstat.amelia)

summary(rm.amelia.out[[1]])

# Model 1.6 Fixed-effects model with Multiple Imputation -- time-varying partisan identities -- 95% threshold

rm.amelia.out <- lapply(a.out_q_varying$imputations, function(i) plm(rev.perlobbyist.2015 ~ h.aligned_cf95 + s.aligned_cf95 + num.clients + client.div.index + law.firm.dum + international.offices.dum + num.domestic.offices + age + as.factor(year_q_int), data = pdata.frame(i,index = c("registrant", "year_q_int")), model="random"))
coeftests.amelia <- lapply(rm.amelia.out, function(i) coeftest(i, vcovHC(i, type="HC3", method = "arellano", cluster="group")))

coefnames <- rownames(coeftests.amelia[[1]])

lintests <- lapply(rm.amelia.out, function(i) linearHypothesis(i, "h.aligned_cf95 = s.aligned_cf95", white.adjust = "hc3", test = "F"))
lintestF <- mean(do.call(rbind, lapply(lintests, function(i) i$F))[,2])

lintestpval <- mean(do.call(rbind, lapply(lintests, function(i) i$`Pr(>F)`))[,2])
lintests[[1]]

coefs.amelia <- do.call(rbind, lapply(coeftests.amelia, function(i) i[,1]))
ses.amelia <- do.call(rbind, lapply(coeftests.amelia, function(i) i[, 2]))
random_effects<- mi.meld(coefs.amelia, ses.amelia)

results.mi.re <- cbind(coefnames, as.vector(random_effects$q.mi), as.vector(random_effects$se.mi))
results.mi.re

adjrsq.amelia <- do.call(rbind, lapply(rm.amelia.out[1:100], function(i) summary(i)$r.squared[2]))
mean(adjrsq.amelia)

fstat.amelia <- do.call(rbind, lapply(rm.amelia.out[1:100], function(i) summary(i)$fstatistic$statistic))
mean(fstat.amelia)

summary(rm.amelia.out[[1]])

sink() #"Table_2.txt")


##### Table 3.  Change in Revenue Per Lobbyist -- Panel Linear Models on First Differences (i.e., Change Y on Change X)
gc()
sink("Table_3.txt")
set.seed(12345)

# Model 2.1 First-differences model -- time-fixed partisan identities -- 85% threshold

fdmodel_85 <- plm(rev.perlobbyist.2015 ~ h.aligned_cf85 + s.aligned_cf85 + num.clients + client.div.index, data = firm_panel_fixed, model="fd")
coeffd <- coeftest(fdmodel_85, vcovHC(fdmodel_85, type="HC3",method="arellano", cluster="group"))
summary(fdmodel_85)
coeffd

lh2 <- linearHypothesis(fdmodel_85, "h.aligned_cf85 = s.aligned_cf85", white.adjust = "hc3", test = "F")
lh2

# Model 2.2 First-differences model -- time-fixed partisan identities -- 90% threshold

fdmodel_90 <- plm(rev.perlobbyist.2015 ~ h.aligned_cf + s.aligned_cf + num.clients + client.div.index, data = firm_panel_fixed, model="fd")
coeffd <- coeftest(fdmodel_90, vcovHC(fdmodel_90, type="HC3",method="arellano", cluster="group"))
summary(fdmodel_90)
coeffd

lh2 <- linearHypothesis(fdmodel_90, "h.aligned_cf = s.aligned_cf", white.adjust = "hc3", test = "F")
lh2

# Model 2.3 First-differences model -- time-fixed partisan identities -- 95% threshold

fdmodel_95 <- plm(rev.perlobbyist.2015 ~ h.aligned_cf95 + s.aligned_cf95 + num.clients + client.div.index, data = firm_panel_fixed, model="fd")
coeffd <- coeftest(fdmodel_95, vcovHC(fdmodel_95, type="HC3",method="arellano", cluster="group"))
summary(fdmodel_95)

coeffd
lh2 <- linearHypothesis(fdmodel_95, "h.aligned_cf95 = s.aligned_cf95",  white.adjust = "hc3", test = "F")
lh2

# Model 2.4 First-differences model -- time-varying partisan identities -- 85% threshold

fdmodel_85 <- plm(rev.perlobbyist.2015 ~ h.aligned_cf85 + s.aligned_cf85 + num.clients + client.div.index, data = firm_panel, model="fd")
coeffd <- coeftest(fdmodel_85, vcovHC(fdmodel_85, type="HC3",method="arellano", cluster="group"))
summary(fdmodel_85)
coeffd

lh2 <- linearHypothesis(fdmodel_85, "h.aligned_cf85 = s.aligned_cf85",  white.adjust = "hc3", test = "F")
lh2

# Model 2.5 First-differences model -- time-varying partisan identities -- 90% threshold

fdmodel_90 <- plm(rev.perlobbyist.2015 ~ h.aligned_cf + s.aligned_cf + num.clients + client.div.index, data = firm_panel, model="fd")
coeffd <- coeftest(fdmodel_90, vcovHC(fdmodel_90, type="HC3",method="arellano", cluster="group"))
summary(fdmodel_90)
coeffd

lh2 <- linearHypothesis(fdmodel_90, "h.aligned_cf = s.aligned_cf", white.adjust = "hc3", test = "F")
lh2

# Model 2.6 First-differences model -- time-varying partisan identities -- 95% threshold

fdmodel_95 <- plm(rev.perlobbyist.2015 ~ h.aligned_cf95 + s.aligned_cf95 + num.clients + client.div.index, data = firm_panel, model="fd")
coeffd <- coeftest(fdmodel_95, vcovHC(fdmodel_95, type="HC3",method="arellano", cluster="group"))
summary(fdmodel_95)
coeffd

lh2 <- linearHypothesis(fdmodel_95, "h.aligned_cf95 = s.aligned_cf95",  white.adjust = "hc3", test = "F")
lh2

sink() #"Table_3.txt")

##### Figure 3 and Figure 4.  Difference-in-Differences Analysis

sink("Figure_3_and_Figure_4.txt")

set.seed(12345)

firm_data_year$Dfirm_CF90 <- 0
firm_data_year$Dfirm_CF90[firm_data_year$D_share >=.9] <- 1
firm_data_year$Rfirm_CF90 <- 0
firm_data_year$Rfirm_CF90[firm_data_year$D_share <=.1] <- 1
firm_data_year$Bfirm_CF90 <- 0
firm_data_year$Bfirm_CF90[firm_data_year$D_share >.1 & firm_data_year$D_share <.9] <- 1

firm_data_year_impute <- firm_data_year%>% dplyr::select(rev.perlobbyist.2015,registrant,year,num.lobbyists,num.clients,law.firm.dum,
                                                         international.offices.dum,num.domestic.offices,age,D_contribs,R_contribs, D_share, client.div.index, Dfirm_CF90, Rfirm_CF90, Bfirm_CF90)

col_nums <- c(6,7,8,9,10,11,12)
lowers <- c(0,0,1,0,0,0,0)
uppers <- c(1,1,83,180,4427780,3060261,1)

firm_data_year_impute$rev_total <- firm_data_year_impute$rev.perlobbyist.2015*firm_data_year_impute$num.lobbyists

filter(firm_data_year, firm_data_year_impute$rev_total > 371328)

constraint_mat <-matrix(c(col_nums,lowers, uppers),nrow=length(col_nums))

firm_data_year_impute$year <- as.numeric(firm_data_year_impute$year)
firm_data_year_impute$D_contribs[is.na(firm_data_year_impute$D_contribs)] <- 0
firm_data_year_impute$R_contribs[is.na(firm_data_year_impute$R_contribs)] <- 0

firm_data_year_impute$total_contribs <- firm_data_year_impute$D_contribs + firm_data_year_impute$R_contribs

a.out_y <- amelia(as.data.frame(firm_data_year_impute), m=100, ts="year", cs="registrant", p2s = 2, logs = c("rev.perlobbyist.2015","num.lobbyists", "num.clients", "client.div.index", "num.domestic.offices", "D_contribs","R_contribs", "rev_total", "total_contribs"), bounds = constraint_mat)

plot(a.out_y, which.vars = c(6,7,8,9,12))
summary(a.out_y)

disperse(a.out_y, dims = 1, m = 100)

# Conduct the diff and diff for 2010 - 2011

a.out_2010 <- lapply(a.out_y$imputations, function(i) filter(i, year == 2010, Rfirm_CF90 == 1 | Dfirm_CF90 == 1))
a.out_2011 <- lapply(a.out_y$imputations, function(i) filter(i, year == 2011, Rfirm_CF90 == 1 | Dfirm_CF90 == 1))

a.out_2010_X <- lapply(a.out_2010, function(i) apply(as.matrix(i)[,c("rev.perlobbyist.2015","num.lobbyists","num.clients", "law.firm.dum", "international.offices.dum","num.domestic.offices" , "age","client.div.index",             
                                                                     "rev_total")], 2, as.numeric))

out_2010_D <- as.numeric(a.out_2010$imp1$Rfirm_CF90)
a.kdbal_2010 <- lapply(a.out_2010_X, function(i) kbal(i, out_2010_D))

a.out_diff <- vector("list", 100) 
a.L1_orig2010 <- vector("list", 100) 
a.L1_kbal2010 <- vector("list", 100) 

for (i in 1:100){
  a.L1_orig2010[i] <- a.kdbal_2010[[i]]$L1_orig
  a.L1_kbal2010[i] <- a.kdbal_2010[[i]]$L1_kbal
  
  w <- a.kdbal_2010[[i]]$w
  partisan_10 <-a.out_2010[[i]]%>%
    cbind(w)
  
  partisan_11 <- a.out_2010[[i]]%>%
    dplyr::select(registrant) %>%
    left_join(a.out_2011[[i]])
  
  partisan_11$year[is.na(partisan_11$year)] <- 2011
  partisan_11$rev.perlobbyist.2015[is.na(partisan_11$rev.perlobbyist.2015)] <- 0 
  
  dat_diff <- partisan_10 %>%
    left_join(dplyr::select(partisan_11, registrant, rev.perlobbyist.2015), by = "registrant") %>%
    mutate(rev.perlobbyist.y = replace(rev.perlobbyist.2015.y, is.na(rev.perlobbyist.2015.y), 0), rpl.diff = rev.perlobbyist.2015.y - rev.perlobbyist.2015.x)
  a.out_diff[[i]] <- dat_diff
  
}
(L1_orig2010 <- mean(unlist(a.L1_orig2010)))
(L1_kbal2010 <- mean(unlist(a.L1_kbal2010)))

a.wtt <- lapply(a.out_diff, function(i) wtd.t.test(filter(i, Rfirm_CF90==1)$rpl.diff, filter(i, Rfirm_CF90==0)$rpl.diff,  weight=filter(i, Rfirm_CF90==1)$w, weighty=filter(i, Rfirm_CF90==0)$w, samedata=FALSE, bootse = TRUE, bootn=1000))

a.wtt.diff <-  do.call(rbind, lapply(a.wtt, function(i) i$additional[1]))
a.wtt.meanR <-  do.call(rbind, lapply(a.wtt, function(i) i$additional[2]))
a.wtt.meanD <-  do.call(rbind, lapply(a.wtt, function(i) i$additional[3]))
a.wtt.se <-  do.call(rbind, lapply(a.wtt, function(i) i$additional[4]))

diff_mi <- mi.meld(a.wtt.diff, a.wtt.se)

meanR_mi <- mi.meld(a.wtt.meanR, a.wtt.se)
meanD_mi <- mi.meld(a.wtt.meanD, a.wtt.se)

means_mi <- c(meanR_mi[[1]], meanD_mi[[1]])
uppers_mi <- unname(c(meanR_mi[[1]] + meanR_mi[[2]], meanD_mi[[1]] + meanD_mi[[2]]))
lowers_mi <- unname(c(meanR_mi[[1]] - meanR_mi[[2]], meanD_mi[[1]] - meanD_mi[[2]]))
dm_dat10 <- data.frame(vars = c("Republican", "Democratic"),means_mi, lowers_mi, uppers_mi)
g10 <- ggplot(dm_dat10, aes(x=vars,   y=means_mi))  + geom_errorbar(aes(ymin=lowers_mi, ymax=uppers_mi), colour="black", width=.1)+ geom_point(size=4) + theme_bw(base_size = 14) + xlab("") + ylab("Change in revenue per lobbyist \n from 2010 and 2011")  + scale_y_continuous(labels = scales::dollar)   
ggsave("DiffnDiff2010_2011.pdf", g10, device="pdf", width = 5, height = 4, units = "in")

a.out_dat <- vector("list", 100) 
for(i in 1:100){
  w <- a.kdbal_2010[[i]]$w
  partisan_10 <-a.out_2010[[i]]%>%
    select(registrant, Dfirm_CF90, Rfirm_CF90) %>%
    cbind(w)
  
  out_dat <- expand.grid(registrant = partisan_10$registrant, year = c(2008,2009,2010,2011,2012,2013,2014,2015))  %>%
    left_join(partisan_10) %>% 
    left_join(a.out_y$imputations[[i]])
  out_dat$rev.perlobbyist.2015[is.na(out_dat$rev.perlobbyist.2015)]<- 0
  a.out_dat[[i]] <- out_dat
  
}

wmean <- do.call(rbind, lapply(a.out_dat, function(i) i %>% group_by(Rfirm_CF90, year) %>% summarise(wmean = weighted.mean(rev.perlobbyist.2015, w))))
wmean <- wmean %>% group_by(Rfirm_CF90, year) %>% summarise(mean_rpl = mean(wmean))
wmean$Rfirm_CF90[wmean$Rfirm_CF90==1] <- "Republican"
wmean$Rfirm_CF90[wmean$Rfirm_CF90==0] <- "Democratic"
pt10 <- ggplot(wmean, aes(x=year, y=mean_rpl, group=as.factor(Rfirm_CF90), linetype=as.factor(Rfirm_CF90))) +geom_line() +theme_bw(base_size=14) + theme(axis.text.x = element_text(angle = 90, hjust = 1, size=8)) +scale_y_continuous(labels = scales::dollar, limits = c(0,280000)) + xlab("Year") + ylab("Mean revenue per lobbyist") + labs(linetype='Firm Type') 
ggsave("paralell_trends10.pdf", pt10, device="pdf", width = 5, height = 4, units = "in")

# Difference in differences for 2013-2014

a.out_2013 <- lapply(a.out_y$imputations, function(i) filter(i, year == 2013, Rfirm_CF90 == 1 | Dfirm_CF90 == 1))
a.out_2014 <- lapply(a.out_y$imputations, function(i) filter(i, year == 2014, Rfirm_CF90 == 1 | Dfirm_CF90 == 1))

a.out_2013_X <- lapply(a.out_2013, function(i)  apply(as.matrix(i)[,c("rev.perlobbyist.2015","num.lobbyists","num.clients", "law.firm.dum", "international.offices.dum","num.domestic.offices", "age", "client.div.index", "rev_total")], 2, as.numeric))

out_2013_D <- as.numeric(a.out_2013$imp1$Rfirm_CF90)
a.kdbal_2013 <- lapply(a.out_2013_X, function(i) kbal(i, out_2013_D))

a.out_diff13 <- vector("list", 100) 
a.L1_orig2013 <- vector("list", 100) 
a.L1_kbal2013 <- vector("list", 100) 

for (i in 1: 100){
  a.L1_orig2013[i] <- a.kdbal_2013[[i]]$L1_orig
  a.L1_kbal2013[i] <- a.kdbal_2013[[i]]$L1_kbal
  
  w <- a.kdbal_2013[[i]]$w
  partisan_13 <-a.out_2013[[i]]%>%
    cbind(w)
  
  partisan_14 <- a.out_2013[[i]]%>%
    dplyr::select(registrant) %>%
    left_join(a.out_2014[[i]])
  
  partisan_14$year[is.na(partisan_14$year)] <- 2014
  partisan_14$rev.perlobbyist.2015[is.na(partisan_14$rev.perlobbyist.2015)] <- 0 
  
  dat_diff <- partisan_13 %>%
    left_join(dplyr::select(partisan_14, registrant, rev.perlobbyist.2015), by = "registrant") %>%
    mutate(rev.perlobbyist.y = replace(rev.perlobbyist.2015.y, is.na(rev.perlobbyist.2015.y), 0), rpl.diff = rev.perlobbyist.2015.y - rev.perlobbyist.2015.x)
  
  a.out_diff13[[i]] <- dat_diff
  
}

(L1_orig2013 <- mean(unlist(a.L1_orig2013)))
(L1_kbal2013 <- mean(unlist(a.L1_kbal2013)))

a.wtt13 <- lapply(a.out_diff13, function(i) wtd.t.test(filter(i, Rfirm_CF90==1)$rpl.diff, filter(i, Rfirm_CF90==0)$rpl.diff,  weight=filter(i, Rfirm_CF90==1)$w, weighty=filter(i, Rfirm_CF90==0)$w, samedata=FALSE, bootse = TRUE, bootn=1000))

a.wtt.diff13 <-  do.call(rbind, lapply(a.wtt13, function(i) i$additional[1]))
a.wtt.meanR13 <-  do.call(rbind, lapply(a.wtt13, function(i) i$additional[2]))
a.wtt.meanD13 <-  do.call(rbind, lapply(a.wtt13, function(i) i$additional[3]))
a.wtt.se13 <-  do.call(rbind, lapply(a.wtt13, function(i) i$additional[4]))

diff_mi13 <- mi.meld(a.wtt.diff13, a.wtt.se13)

meanR_mi13 <- mi.meld(a.wtt.meanR13, a.wtt.se13)
meanD_mi13 <- mi.meld(a.wtt.meanD13, a.wtt.se13)

means_mi <- c(meanR_mi13[[1]], meanD_mi13[[1]])
uppers_mi <- unname(c(meanR_mi13[[1]] + meanR_mi13[[2]], meanD_mi13[[1]] + meanD_mi13[[2]]))
lowers_mi <- unname(c(meanR_mi13[[1]] - meanR_mi13[[2]], meanD_mi13[[1]] - meanD_mi13[[2]]))
dm_dat13 <- data.frame(vars = c("Republican", "Democratic"),means_mi, lowers_mi, uppers_mi)
g13 <- ggplot(dm_dat13, aes(x=vars,   y=means_mi))  + geom_errorbar(aes(ymin=lowers_mi, ymax=uppers_mi), colour="black", width=.1)+ geom_point(size=4) + theme_bw(base_size = 14) + xlab("") + ylab("Change in revenue per lobbyist \n from 2013 and 2014") + scale_y_continuous(labels = scales::dollar)   
ggsave("DiffnDiff2013_2014.pdf", g13, device="pdf", width = 5, height = 4, units = "in")

a.out_dat13 <- vector("list", 100) 
for(i in 1: 100){
  w <- a.kdbal_2013[[i]]$w
  partisan_13 <-a.out_2013[[i]]%>%
    select(registrant, Dfirm_CF90, Rfirm_CF90) %>%
    cbind(w)
  
  out_dat <- expand.grid(registrant = partisan_13$registrant, year = c(2008,2009,2010,2011,2012,2013,2014,2015))  %>%
    left_join(partisan_13) %>% 
    left_join(a.out_y$imputations[[i]])
  out_dat$rev.perlobbyist.2015[is.na(out_dat$rev.perlobbyist.2015)]<- 0
  a.out_dat13[[i]] <- out_dat
  
}

wmean <- do.call(rbind, lapply(a.out_dat13, function(i) i %>% group_by(Rfirm_CF90, year) %>% summarise(wmean = weighted.mean(rev.perlobbyist.2015, w))))
wmean <- wmean %>% group_by(Rfirm_CF90, year) %>% summarise(mean_rpl = mean(wmean))
wmean$Rfirm_CF90[wmean$Rfirm_CF90==1] <- "Republican"
wmean$Rfirm_CF90[wmean$Rfirm_CF90==0] <- "Democratic"
pt13 <- ggplot(wmean, aes(x=year, y=mean_rpl, group=as.factor(Rfirm_CF90), linetype=as.factor(Rfirm_CF90))) +geom_line() +theme_bw(base_size=14) + theme(axis.text.x = element_text(angle = 90, hjust = 1, size=8)) +scale_y_continuous(labels = scales::dollar, limits = c(0,280000)) + xlab("Year") + ylab("Mean revenue per lobbyist") + labs(linetype='Firm Type') 
ggsave("paralell_trends13.pdf", pt13, device="pdf", width = 5, height = 4, units = "in")

# Conduct the diff and diff for 2014 - 2015

a.out_2014 <- lapply(a.out_y$imputations, function(i) filter(i, year == 2014, Rfirm_CF90 == 1 | Dfirm_CF90 == 1))
a.out_2015 <- lapply(a.out_y$imputations, function(i) filter(i, year == 2015, Rfirm_CF90 == 1 | Dfirm_CF90 == 1))

a.out_2014_X <- lapply(a.out_2014, function(i)  apply(as.matrix(i)[,c("rev.perlobbyist.2015","num.lobbyists","num.clients", "law.firm.dum", "international.offices.dum","num.domestic.offices" , "age","client.div.index", "rev_total")], 2, as.numeric))

out_2014_D <- as.numeric(a.out_2014$imp1$Rfirm_CF90)
a.kdbal_2014 <- lapply(a.out_2014_X, function(i) kbal(i, out_2014_D))

a.out_diff14 <- vector("list", 100) 
a.L1_orig2014 <- vector("list", 100) 
a.L1_kbal2014 <- vector("list", 100) 

for (i in 1: 100){
  a.L1_orig2014[i] <- a.kdbal_2014[[i]]$L1_orig
  a.L1_kbal2014[i] <- a.kdbal_2014[[i]]$L1_kbal
  w <- a.kdbal_2014[[i]]$w
  partisan_14 <-a.out_2014[[i]]%>%
    cbind(w)
  
  partisan_15 <- a.out_2014[[i]]%>%
    dplyr::select(registrant) %>%
    left_join(a.out_2015[[i]])
  
  partisan_15$year[is.na(partisan_15$year)] <- 2015
  partisan_15$rev.perlobbyist.2015[is.na(partisan_15$rev.perlobbyist.2015)] <- 0 
  
  dat_diff <- partisan_14 %>%
    left_join(dplyr::select(partisan_15, registrant, rev.perlobbyist.2015), by = "registrant") %>%
    mutate(rev.perlobbyist.y = replace(rev.perlobbyist.2015.y, is.na(rev.perlobbyist.2015.y), 0), rpl.diff = rev.perlobbyist.2015.y - rev.perlobbyist.2015.x)
  a.out_diff14[[i]] <- dat_diff
  
}
(L1_orig2014 <- mean(unlist(a.L1_orig2014)))
(L1_kbal2014 <- mean(unlist(a.L1_kbal2014)))

a.wtt14 <- lapply(a.out_diff14, function(i) wtd.t.test(filter(i, Rfirm_CF90==1)$rpl.diff, filter(i, Rfirm_CF90==0)$rpl.diff,  weight=filter(i, Rfirm_CF90==1)$w, weighty=filter(i, Rfirm_CF90==0)$w, samedata=FALSE, bootse = TRUE, bootn=1000))

a.wtt14.diff <-  do.call(rbind, lapply(a.wtt14, function(i) i$additional[1]))
a.wtt14.meanR <-  do.call(rbind, lapply(a.wtt14, function(i) i$additional[2]))
a.wtt14.meanD <-  do.call(rbind, lapply(a.wtt14, function(i) i$additional[3]))
a.wtt14.se <-  do.call(rbind, lapply(a.wtt14, function(i) i$additional[4]))

diff_mi14 <- mi.meld(a.wtt14.diff, a.wtt14.se)

meanR_mi14 <- mi.meld(a.wtt14.meanR, a.wtt14.se)
meanD_mi14 <- mi.meld(a.wtt14.meanD, a.wtt14.se)

means_mi <- c(meanR_mi14[[1]], meanD_mi14[[1]])
uppers_mi <- unname(c(meanR_mi14[[1]] + meanR_mi14[[2]], meanD_mi14[[1]] + meanD_mi14[[2]]))
lowers_mi <- unname(c(meanR_mi14[[1]] - meanR_mi14[[2]], meanD_mi14[[1]] - meanD_mi14[[2]]))
dm_dat14 <- data.frame(vars = c("Republican", "Democratic"),means_mi, lowers_mi, uppers_mi)
g14 <- ggplot(dm_dat14, aes(x=vars,   y=means_mi))  + geom_errorbar(aes(ymin=lowers_mi, ymax=uppers_mi), colour="black", width=.1)+ geom_point(size=4) + theme_bw(base_size = 14) + xlab("") + ylab("Change in revenue per lobbyist \n from 2014 and 2015")  + scale_y_continuous(labels = scales::dollar)   
ggsave("DiffnDiff2014_2015.pdf", g14, device="pdf", width = 5, height = 4, units = "in")

a.out_dat14 <- vector("list", 100) 
for(i in 1: 100){
  w <- a.kdbal_2014[[i]]$w
  partisan_14 <-a.out_2014[[i]]%>%
    select(registrant, Dfirm_CF90, Rfirm_CF90) %>%
    cbind(w)
  
  out_dat <- expand.grid(registrant = partisan_14$registrant, year = c(2008,2009,2010,2011,2012,2013,2014,2015))  %>%
    left_join(partisan_14) %>% 
    left_join(a.out_y$imputations[[i]])
  out_dat$rev.perlobbyist.2015[is.na(out_dat$rev.perlobbyist.2015)]<- 0
  a.out_dat14[[i]] <- out_dat
  
}

wmean <- do.call(rbind, lapply(a.out_dat14, function(i) i %>% group_by(Rfirm_CF90, year) %>% summarise(wmean = weighted.mean(rev.perlobbyist.2015, w))))
wmean <- wmean %>% group_by(Rfirm_CF90, year) %>% summarise(mean_rpl = mean(wmean))
wmean$Rfirm_CF90[wmean$Rfirm_CF90==1] <- "Republican"
wmean$Rfirm_CF90[wmean$Rfirm_CF90==0] <- "Democratic"
pt14 <- ggplot(wmean, aes(x=year, y=mean_rpl, group=as.factor(Rfirm_CF90), linetype=as.factor(Rfirm_CF90))) +geom_line() +theme_bw(base_size=14) + theme(axis.text.x = element_text(angle = 90, hjust = 1, size=8)) +scale_y_continuous(labels = scales::dollar, limits = c(0,295000)) + xlab("Year") + ylab("Mean revenue per lobbyist") + labs(linetype='Firm Type') 
ggsave("paralell_trends14.pdf", pt14, device="pdf", width = 5, height = 4, units = "in")

# Conduct diff and diff for 2009 - 2010 as a placebo

a.out_2009 <- lapply(a.out_y$imputations, function(i) filter(i, year == 2009, Rfirm_CF90 == 1 | Dfirm_CF90 == 1))
a.out_2010 <- lapply(a.out_y$imputations, function(i) filter(i, year == 2010, Rfirm_CF90 == 1 | Dfirm_CF90 == 1))

a.out_2009_X <- lapply(a.out_2009, function(i)  apply(as.matrix(i)[,c("rev.perlobbyist.2015","num.lobbyists","num.clients", "law.firm.dum", "international.offices.dum","num.domestic.offices" , "age","client.div.index","rev_total")], 2, as.numeric))

out_2009_D <- as.numeric(a.out_2009$imp1$Rfirm_CF90)
a.kdbal_2009 <- lapply(a.out_2009_X, function(i) kbal(i, out_2009_D))

a.out_diff09 <- vector("list", 100) 
a.L1_orig2009 <- vector("list", 100) 
a.L1_kbal2009 <- vector("list", 100) 

for (i in 1: 100){
  a.L1_orig2009[i] <- a.kdbal_2009[[i]]$L1_orig
  a.L1_kbal2009[i] <- a.kdbal_2009[[i]]$L1_kbal
  
  w <- a.kdbal_2009[[i]]$w
  partisan_09 <-a.out_2009[[i]]%>%
    cbind(w)
  
  partisan_10 <- a.out_2009[[i]]%>%
    dplyr::select(registrant) %>%
    left_join(a.out_2010[[i]])
  
  partisan_10$year[is.na(partisan_10$year)] <- 2010
  partisan_10$rev.perlobbyist.2015[is.na(partisan_10$rev.perlobbyist.2015)] <- 0 
  
  dat_diff <- partisan_09 %>%
    left_join(dplyr::select(partisan_10, registrant, rev.perlobbyist.2015), by = "registrant") %>%
    mutate(rev.perlobbyist.y = replace(rev.perlobbyist.2015.y, is.na(rev.perlobbyist.2015.y), 0), rpl.diff = rev.perlobbyist.2015.y - rev.perlobbyist.2015.x)
  a.out_diff09[[i]] <- dat_diff
  
}
(L1_orig2009 <- mean(unlist(a.L1_orig2009)))
(L1_kbal2009 <- mean(unlist(a.L1_kbal2009)))

a.wtt09 <- lapply(a.out_diff09, function(i) wtd.t.test(filter(i, Rfirm_CF90==1)$rpl.diff, filter(i, Rfirm_CF90==0)$rpl.diff,  weight=filter(i, Rfirm_CF90==1)$w, weighty=filter(i, Rfirm_CF90==0)$w, samedata=FALSE, bootse = TRUE, bootn=1000))

a.wtt09.diff <-  do.call(rbind, lapply(a.wtt09, function(i) i$additional[1]))
a.wtt09.meanR <-  do.call(rbind, lapply(a.wtt09, function(i) i$additional[2]))
a.wtt09.meanD <-  do.call(rbind, lapply(a.wtt09, function(i) i$additional[3]))
a.wtt09.se <-  do.call(rbind, lapply(a.wtt09, function(i) i$additional[4]))

diff_mi09 <- mi.meld(a.wtt09.diff, a.wtt09.se)

meanR_mi09 <- mi.meld(a.wtt09.meanR, a.wtt09.se)
meanD_mi09 <- mi.meld(a.wtt09.meanD, a.wtt09.se)

means_mi <- c(meanR_mi09[[1]], meanD_mi09[[1]])
uppers_mi <- unname(c(meanR_mi09[[1]] + meanR_mi09[[2]], meanD_mi09[[1]] + meanD_mi09[[2]]))
lowers_mi <- unname(c(meanR_mi09[[1]] - meanR_mi09[[2]], meanD_mi09[[1]] - meanD_mi09[[2]]))
dm_dat09 <- data.frame(vars = c("Republican", "Democratic"),means_mi, lowers_mi, uppers_mi)
g09 <- ggplot(dm_dat09, aes(x=vars,   y=means_mi))  + geom_errorbar(aes(ymin=lowers_mi, ymax=uppers_mi), colour="black", width=.1)+ geom_point(size=4) + theme_bw(base_size = 14) + xlab("") + ylab("Change in revenue per lobbyist \n from 2009 and 2010") + scale_y_continuous(labels = scales::dollar)   
ggsave("DiffnDiff2009_2010.pdf", g09, device="pdf", width = 5, height = 4, units = "in")

a.out_dat09 <- vector("list", 100) 
for(i in 1: 100){
  w <- a.kdbal_2009[[i]]$w
  partisan_09 <-a.out_2009[[i]]%>%
    select(registrant, Dfirm_CF90, Rfirm_CF90) %>%
    cbind(w)
  
  out_dat <- expand.grid(registrant = partisan_09$registrant, year = c(2008, 2009,2010,2011,2012,2013,2014,2015))  %>%
    left_join(partisan_09) %>% 
    left_join(a.out_y$imputations[[i]])
  out_dat$rev.perlobbyist.2015[is.na(out_dat$rev.perlobbyist.2015)]<- 0
  a.out_dat09[[i]] <- out_dat
  
}

wmean <- do.call(rbind, lapply(a.out_dat09, function(i) i %>% group_by(Rfirm_CF90, year) %>% summarise(wmean = weighted.mean(rev.perlobbyist.2015, w))))
wmean <- wmean %>% group_by(Rfirm_CF90, year) %>% summarise(mean_rpl = mean(wmean))
wmean$Rfirm_CF90[wmean$Rfirm_CF90==1] <- "Republican"
wmean$Rfirm_CF90[wmean$Rfirm_CF90==0] <- "Democratic"
pt09 <- ggplot(wmean, aes(x=year, y=mean_rpl, group=as.factor(Rfirm_CF90), linetype=as.factor(Rfirm_CF90))) +geom_line() +theme_bw(base_size=14) + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1, size=8), plot.title = element_text(size = 18,hjust = 0)) +scale_y_continuous(labels = scales::dollar, limits = c(0,280000)) + xlab("Year") + ylab("Mean revenue per lobbyist") + labs(linetype='Firm Type') 
ggsave("paralell_trends09.pdf", pt09, device="pdf",  width = 5, height = 4, units = "in")

balance <- cbind( c(L1_orig2009, L1_kbal2009), c(L1_orig2010, L1_kbal2010),c(L1_orig2013, L1_kbal2013), c(L1_orig2014, L1_kbal2014))
balance

rownames(balance) <- c("preweight L1", "postweight L1")
colnames(balance) <- c( "2009", "2010", "2013","2014")

difftable <- cbind(c(diff_mi09$q.mi, diff_mi09$se.mi), c(diff_mi$q.mi, diff_mi$se.mi),c(diff_mi13$q.mi, diff_mi13$se.mi), c(diff_mi14$q.mi, diff_mi14$se.mi))
rownames(difftable) <- c("Difference in mean revenue per lobbyist", "Standard error")
colnames(difftable) <- c( "2009-2010", "2010-2011","2013-2014", "2014-2015")
difftable

sink() #"Figure_3_and_Figure_4.txt")

##### Appendix A.  Imputation Procedures and Diagnostics


# See code above under Model 1.2 and Model 1.5.


##### Appendix B.  Fixed-Effects Models

set.seed(12345)
# Model 3.1 Fixed-effects model -- time-fixed partisan identities -- 85% threshold

femodel_fixed_85 <- plm(rev.perlobbyist.2015 ~ h.aligned_cf85 + s.aligned_cf85 + num.clients + client.div.index, data = firm_panel_fixed, model="within", effect="twoways")
coeffe_fixed_85 <- coeftest(femodel_fixed_85, vcovHC(femodel_fixed_85, type="HC3",method="arellano", cluster="group"))
summary(femodel_fixed_85)
coeffe_fixed_85

lh1 <- linearHypothesis(femodel_fixed_85, "h.aligned_cf85 = s.aligned_cf85",  white.adjust = "hc3", test = "F")
lh1

# Model 3.2 Fixed-effects model -- time-fixed partisan identities -- 90% threshold

femodel_fixed_90 <- plm(rev.perlobbyist.2015 ~ h.aligned_cf + s.aligned_cf + num.clients + client.div.index, data = firm_panel_fixed, model="within", effect="twoways")
coeffe_fixed_90 <- coeftest(femodel_fixed_90, vcovHC(femodel_fixed_90, type="HC3",method="arellano", cluster="group"))
summary(femodel_fixed_90)
coeffe_fixed_90

lh1 <- linearHypothesis(femodel_fixed_90, "h.aligned_cf = s.aligned_cf",  white.adjust = "hc3", test = "F")
lh1

# Model 3.3 Fixed-effects model -- time-fixed partisan identities -- 95% threshold

femodel_fixed_95 <- plm(rev.perlobbyist.2015 ~ h.aligned_cf95 + s.aligned_cf95 + num.clients + client.div.index, data = firm_panel_fixed, model="within", effect="twoways")
coeffe_fixed_95 <- coeftest(femodel_fixed_95, vcovHC(femodel_fixed_95, type="HC3",method="arellano", cluster="group"))
summary(femodel_fixed_95)
coeffe_fixed_95

lh1 <- linearHypothesis(femodel_fixed_95, "h.aligned_cf95 = s.aligned_cf95",  white.adjust = "hc3", test = "F")
lh1

# Model 3.4 Fixed-effects model -- time-varying partisan identities -- 85% threshold

femodel_85 <- plm(rev.perlobbyist.2015 ~ h.aligned_cf85 + s.aligned_cf85 + num.clients + client.div.index, data = firm_panel, model="within", effect="twoways")
coeffe_85 <- coeftest(femodel_85, vcovHC(femodel_85, type="HC3",method="arellano", cluster="group"))
summary(femodel_85)
coeffe_85

lh1 <- linearHypothesis(femodel_85, "h.aligned_cf85 = s.aligned_cf85", white.adjust = "hc3", test = "F")
lh1

# Model 3.5 Fixed-effects model -- time-varying partisan identities -- 90% threshold

femodel_90 <- plm(rev.perlobbyist.2015 ~ h.aligned_cf + s.aligned_cf + num.clients + client.div.index, data = firm_panel, model="within", effect="twoways")
coeffe_90 <- coeftest(femodel_90, vcovHC(femodel_90, type="HC3",method="arellano", cluster="group"))
summary(femodel_90)
coeffe_90

lh1 <- linearHypothesis(femodel_90, "h.aligned_cf = s.aligned_cf", white.adjust = "hc3", test = "F")
lh1

# Model 3.6 Fixed-effects model -- time-varying partisan identities -- 95% threshold

femodel_95 <- plm(rev.perlobbyist.2015 ~ h.aligned_cf95 + s.aligned_cf95 + num.clients + client.div.index, data = firm_panel,  model="within", effect="twoways")
coeffe_95 <- coeftest(femodel_95, vcovHC(femodel_95, type="HC3",method="arellano", cluster="group"))
summary(femodel_95)
coeffe_95

lh1 <- linearHypothesis(femodel_95, "h.aligned_cf95 = s.aligned_cf95", white.adjust = "hc3", test = "F")
lh1

sink()
##### Appendix C.  Variation in Model Specification to Evaluate the Possible Effects of Multicollinearity on the First Set of Models (1.2 and 1.5)

# Variations on Model 1.2 Fixed-effects model with Multiple Imputation -- time-fixed partisan identities -- 90% threshold
set.seed(12345)

rm.amelia.out <- lapply(a.out_q_fixed$imputations, function(i) plm(rev.perlobbyist.2015 ~ h.aligned_cf + num.clients + client.div.index + law.firm.dum + international.offices.dum + num.domestic.offices  + age + as.factor(year_q_int), data = pdata.frame(i,index = c("registrant", "year_q_int")),  model="random"))
coeftests.amelia <- lapply(rm.amelia.out, function(i) coeftest(i, vcovHC(i, type="HC3", method = "arellano", cluster="group")))

coefnames <- rownames(coeftests.amelia[[1]])

coefs.amelia <- do.call(rbind, lapply(coeftests.amelia, function(i) i[,1]))
ses.amelia <- do.call(rbind, lapply(coeftests.amelia, function(i) i[, 2]))
random_effects <- mi.meld(coefs.amelia, ses.amelia)

results.mi.re <- cbind(coefnames, as.vector(random_effects$q.mi), as.vector(random_effects$se.mi))
results.mi.re

adjrsq.amelia <- do.call(rbind, lapply(rm.amelia.out[1:100], function(i) summary(i)$r.squared[2]))
mean(adjrsq.amelia)

fstat.amelia <- do.call(rbind, lapply(rm.amelia.out[1:100], function(i) summary(i)$fstatistic$statistic))
mean(fstat.amelia)

summary(rm.amelia.out[[1]])


rm.amelia.out <- lapply(a.out_q_fixed$imputations, function(i) plm(rev.perlobbyist.2015 ~ s.aligned_cf + num.clients + client.div.index + law.firm.dum + international.offices.dum + num.domestic.offices  + age + as.factor(year_q_int), data = pdata.frame(i,index = c("registrant", "year_q_int")),  model="random"))
coeftests.amelia <- lapply(rm.amelia.out, function(i) coeftest(i, vcovHC(i, type="HC3", method = "arellano", cluster="group")))

coefnames <- rownames(coeftests.amelia[[1]])

coefs.amelia <- do.call(rbind, lapply(coeftests.amelia, function(i) i[,1]))
ses.amelia <- do.call(rbind, lapply(coeftests.amelia, function(i) i[, 2]))
random_effects <- mi.meld(coefs.amelia, ses.amelia)

results.mi.re <- cbind(coefnames, as.vector(random_effects$q.mi), as.vector(random_effects$se.mi))
results.mi.re

adjrsq.amelia <- do.call(rbind, lapply(rm.amelia.out[1:100], function(i) summary(i)$r.squared[2]))
mean(adjrsq.amelia)

fstat.amelia <- do.call(rbind, lapply(rm.amelia.out[1:100], function(i) summary(i)$fstatistic$statistic))
mean(fstat.amelia)

summary(rm.amelia.out[[1]])

# Variations on Model 1.5 Fixed-effects model with Multiple Imputation -- time-varying partisan identities -- 90% threshold


rm.amelia.out <- lapply(a.out_q_varying$imputations, function(i) plm(rev.perlobbyist.2015 ~ h.aligned_cf + num.clients + client.div.index + law.firm.dum + international.offices.dum + num.domestic.offices + age + as.factor(year_q_int), data = pdata.frame(i,index = c("registrant", "year_q_int")), model="random"))
coeftests.amelia <- lapply(rm.amelia.out, function(i) coeftest(i, vcovHC(i, type="HC3", method = "arellano", cluster="group")))

coefnames <- rownames(coeftests.amelia[[1]])

coefs.amelia <- do.call(rbind, lapply(coeftests.amelia, function(i) i[,1]))
ses.amelia <- do.call(rbind, lapply(coeftests.amelia, function(i) i[, 2]))
random_effects<- mi.meld(coefs.amelia, ses.amelia)

results.mi.re <- cbind(coefnames, as.vector(random_effects$q.mi), as.vector(random_effects$se.mi))
results.mi.re

adjrsq.amelia <- do.call(rbind, lapply(rm.amelia.out[1:100], function(i) summary(i)$r.squared[2]))
mean(adjrsq.amelia)

fstat.amelia <- do.call(rbind, lapply(rm.amelia.out[1:100], function(i) summary(i)$fstatistic$statistic))
mean(fstat.amelia)

summary(rm.amelia.out[[1]])


rm.amelia.out <- lapply(a.out_q_varying$imputations, function(i) plm(rev.perlobbyist.2015 ~ s.aligned_cf + num.clients + client.div.index + law.firm.dum + international.offices.dum + num.domestic.offices + age + as.factor(year_q_int), data = pdata.frame(i,index = c("registrant", "year_q_int")), model="random"))
coeftests.amelia <- lapply(rm.amelia.out, function(i) coeftest(i, vcovHC(i, type="HC3", method = "arellano", cluster="group")))

coefnames <- rownames(coeftests.amelia[[1]])

coefs.amelia <- do.call(rbind, lapply(coeftests.amelia, function(i) i[,1]))
ses.amelia <- do.call(rbind, lapply(coeftests.amelia, function(i) i[, 2]))
random_effects<- mi.meld(coefs.amelia, ses.amelia)

results.mi.re <- cbind(coefnames, as.vector(random_effects$q.mi), as.vector(random_effects$se.mi))
results.mi.re

adjrsq.amelia <- do.call(rbind, lapply(rm.amelia.out[1:100], function(i) summary(i)$r.squared[2]))
mean(adjrsq.amelia)

fstat.amelia <- do.call(rbind, lapply(rm.amelia.out[1:100], function(i) summary(i)$fstatistic$statistic))
mean(fstat.amelia)

summary(rm.amelia.out[[1]])

# Not included in Appendix C: Variations on Model 1.4 and Model 1.6 Fixed-effects model with Multiple Imputation -- time-varying partisan identities -- 90% threshold

rm.amelia.out <- lapply(a.out_q_varying$imputations, function(i) plm(rev.perlobbyist.2015 ~ h.aligned_cf85 + num.clients + client.div.index + law.firm.dum + international.offices.dum + num.domestic.offices + age + as.factor(year_q_int), data = pdata.frame(i,index = c("registrant", "year_q_int")), model="random"))
coeftests.amelia <- lapply(rm.amelia.out, function(i) coeftest(i, vcovHC(i, type="HC3", method = "arellano", cluster="group")))

coefnames <- rownames(coeftests.amelia[[1]])

coefs.amelia <- do.call(rbind, lapply(coeftests.amelia, function(i) i[,1]))
ses.amelia <- do.call(rbind, lapply(coeftests.amelia, function(i) i[, 2]))
random_effects<- mi.meld(coefs.amelia, ses.amelia)

results.mi.re <- cbind(coefnames, as.vector(random_effects$q.mi), as.vector(random_effects$se.mi))
results.mi.re

adjrsq.amelia <- do.call(rbind, lapply(rm.amelia.out[1:100], function(i) summary(i)$r.squared[2]))
mean(adjrsq.amelia)

fstat.amelia <- do.call(rbind, lapply(rm.amelia.out[1:100], function(i) summary(i)$fstatistic$statistic))
mean(fstat.amelia)

summary(rm.amelia.out[[1]])



rm.amelia.out <- lapply(a.out_q_varying$imputations, function(i) plm(rev.perlobbyist.2015 ~ s.aligned_cf85 + num.clients + client.div.index + law.firm.dum + international.offices.dum + num.domestic.offices + age + as.factor(year_q_int), data = pdata.frame(i,index = c("registrant", "year_q_int")), model="random"))
coeftests.amelia <- lapply(rm.amelia.out, function(i) coeftest(i, vcovHC(i, type="HC3", method = "arellano", cluster="group")))

coefnames <- rownames(coeftests.amelia[[1]])

coefs.amelia <- do.call(rbind, lapply(coeftests.amelia, function(i) i[,1]))
ses.amelia <- do.call(rbind, lapply(coeftests.amelia, function(i) i[, 2]))
random_effects<- mi.meld(coefs.amelia, ses.amelia)

results.mi.re <- cbind(coefnames, as.vector(random_effects$q.mi), as.vector(random_effects$se.mi))
results.mi.re

adjrsq.amelia <- do.call(rbind, lapply(rm.amelia.out[1:100], function(i) summary(i)$r.squared[2]))
mean(adjrsq.amelia)

fstat.amelia <- do.call(rbind, lapply(rm.amelia.out[1:100], function(i) summary(i)$fstatistic$statistic))
mean(fstat.amelia)

summary(rm.amelia.out[[1]])


rm.amelia.out <- lapply(a.out_q_varying$imputations, function(i) plm(rev.perlobbyist.2015 ~ h.aligned_cf95 + num.clients + client.div.index + law.firm.dum + international.offices.dum + num.domestic.offices + age + as.factor(year_q_int), data = pdata.frame(i,index = c("registrant", "year_q_int")), model="random"))
coeftests.amelia <- lapply(rm.amelia.out, function(i) coeftest(i, vcovHC(i, type="HC3", method = "arellano", cluster="group")))

coefnames <- rownames(coeftests.amelia[[1]])

coefs.amelia <- do.call(rbind, lapply(coeftests.amelia, function(i) i[,1]))
ses.amelia <- do.call(rbind, lapply(coeftests.amelia, function(i) i[, 2]))
random_effects<- mi.meld(coefs.amelia, ses.amelia)

results.mi.re <- cbind(coefnames, as.vector(random_effects$q.mi), as.vector(random_effects$se.mi))
results.mi.re

adjrsq.amelia <- do.call(rbind, lapply(rm.amelia.out[1:100], function(i) summary(i)$r.squared[2]))
mean(adjrsq.amelia)

fstat.amelia <- do.call(rbind, lapply(rm.amelia.out[1:100], function(i) summary(i)$fstatistic$statistic))
mean(fstat.amelia)

summary(rm.amelia.out[[1]])


rm.amelia.out <- lapply(a.out_q_varying$imputations, function(i) plm(rev.perlobbyist.2015 ~ s.aligned_cf95 + num.clients + client.div.index + law.firm.dum + international.offices.dum + num.domestic.offices + age + as.factor(year_q_int), data = pdata.frame(i,index = c("registrant", "year_q_int")), model="random"))
coeftests.amelia <- lapply(rm.amelia.out, function(i) coeftest(i, vcovHC(i, type="HC3", method = "arellano", cluster="group")))

coefnames <- rownames(coeftests.amelia[[1]])

coefs.amelia <- do.call(rbind, lapply(coeftests.amelia, function(i) i[,1]))
ses.amelia <- do.call(rbind, lapply(coeftests.amelia, function(i) i[, 2]))
random_effects<- mi.meld(coefs.amelia, ses.amelia)

results.mi.re <- cbind(coefnames, as.vector(random_effects$q.mi), as.vector(random_effects$se.mi))
results.mi.re

adjrsq.amelia <- do.call(rbind, lapply(rm.amelia.out[1:100], function(i) summary(i)$r.squared[2]))
mean(adjrsq.amelia)

fstat.amelia <- do.call(rbind, lapply(rm.amelia.out[1:100], function(i) summary(i)$fstatistic$statistic))
mean(fstat.amelia)

summary(rm.amelia.out[[1]])

##### Appendix D.  Variation in Model Specification to Evaluate the Possible Effects of Multicollinearity on the Second Set of Models (2.2 and 2.5)
# Variations on Model 2.2 First-differences model -- time-fixed partisan identities -- 90% threshold
set.seed(12345)

fdmodel_90 <- plm(rev.perlobbyist.2015 ~ h.aligned_cf + num.clients + client.div.index, data = firm_panel_fixed, model="fd")
coeffd <- coeftest(fdmodel_90, vcovHC(fdmodel_90, type="HC3",method="arellano", cluster="group"))
summary(fdmodel_90)
coeffd

fdmodel_90 <- plm(rev.perlobbyist.2015 ~ s.aligned_cf + num.clients + client.div.index, data = firm_panel_fixed, model="fd")
coeffd <- coeftest(fdmodel_90, vcovHC(fdmodel_90, type="HC3",method="arellano", cluster="group"))
summary(fdmodel_90)
coeffd

# Variations on Model 2.5 First-differences model -- time-varying partisan identities -- 90% threshold

fdmodel_90 <- plm(rev.perlobbyist.2015 ~ h.aligned_cf + num.clients + client.div.index, data = firm_panel, model="fd")
coeffd <- coeftest(fdmodel_90, vcovHC(fdmodel_90, type="HC3",method="arellano", cluster="group"))
summary(fdmodel_90)
coeffd

fdmodel_90 <- plm(rev.perlobbyist.2015 ~ s.aligned_cf + num.clients + client.div.index, data = firm_panel, model="fd")
coeffd <- coeftest(fdmodel_90, vcovHC(fdmodel_90, type="HC3",method="arellano", cluster="group"))
summary(fdmodel_90)
coeffd

sink()

##### Appendix E.  Models using Partisanship of Founders to Measure Firm Identity
# Model 6.1
set.seed(12345)

firm_quarterly_fixed_impute2 <- firm_quarterly_fixed %>% dplyr::select(rev.perlobbyist.2015,registrant,year,num.lobbyists,num.clients,law.firm.dum,
                                                                      international.offices.dum,num.domestic.offices,age,h.aligned,s.aligned,year_q_int,industry.diversity,issue.diversity, client.div.index, Dfound ,Rfound)
col_nums <- c(6,7,8,9,16,17)
lowers <- c(0,0,1,0,0,0)
uppers <- c(1,1,83,180,1,1)

constraint_mat <-matrix(c(col_nums,lowers,uppers),nrow=length(col_nums))
firm_quarterly_fixed_impute2$year_q_int <- as.numeric(as.character(firm_quarterly_fixed_impute2$year_q_int))

a.out_q_fixed2 <- amelia(as.data.frame(firm_quarterly_fixed_impute2), m=100,ts="year_q_int", cs="registrant", p2s = 2, idvars = c("client.div.index"), logs = c("num.lobbyists","num.clients","industry.diversity","issue.diversity","num.domestic.offices","age"), bounds = constraint_mat)

pdf('QuarterlyFixedPanel_Imputation2.pdf')
plot(a.out_q_fixed2, which.vars = c(6,7,8,9,16,17))
dev.off()
pdf('QuarterlyFixedPanel_Dispersion2.pdf')
disperse(a.out_q_fixed2, dims = 1, m = 100)
dev.off()
summary(a.out_q_fixed2)

rm.amelia.out <- lapply(a.out_q_fixed$imputations, function(i) plm(rev.perlobbyist.2015 ~ h.aligned + s.aligned + num.clients + client.div.index + law.firm.dum + international.offices.dum + num.domestic.offices  + age + as.factor(year_q_int), data = pdata.frame(i,index = c("registrant", "year_q_int")),  model="random"))
coeftests.amelia <- lapply(rm.amelia.out, function(i) coeftest(i, vcovHC(i, type="HC3", method = "arellano", cluster="group")))

coefnames <- rownames(coeftests.amelia[[1]])

lintests <- lapply(rm.amelia.out, function(i) linearHypothesis(i, "h.aligned =  s.aligned",  white.adjust = "hc3", test = "F"))
lintestF <- mean(do.call(rbind, lapply(lintests, function(i) i$F))[,2])

lintestpval <- mean(do.call(rbind, lapply(lintests, function(i) i$`Pr(>F)`))[,2])

lintests[[1]]

coefs.amelia <- do.call(rbind, lapply(coeftests.amelia, function(i) i[,1]))
ses.amelia <- do.call(rbind, lapply(coeftests.amelia, function(i) i[, 2]))
random_effects <- mi.meld(coefs.amelia, ses.amelia)

results.mi.re <- cbind(coefnames, as.vector(random_effects$q.mi), as.vector(random_effects$se.mi))
results.mi.re

adjrsq.amelia <- do.call(rbind, lapply(rm.amelia.out[1:100], function(i) summary(i)$r.squared[2]))
mean(adjrsq.amelia)

fstat.amelia <- do.call(rbind, lapply(rm.amelia.out[1:100], function(i) summary(i)$fstatistic$statistic))
mean(fstat.amelia)

summary(rm.amelia.out[[1]])

# Model 6.2
firm_quarterly_varying_impute2 <- firm_quarterly %>% dplyr::select(rev.perlobbyist.2015,registrant,year,num.lobbyists,num.clients,law.firm.dum,
                                                                       international.offices.dum,num.domestic.offices,age,h.aligned,s.aligned,year_q_int,industry.diversity,issue.diversity, client.div.index, Dfound ,Rfound)
col_nums <- c(6,7,8,9,16,17)
lowers <- c(0,0,1,0,0,0)
uppers <- c(1,1,83,180,1,1)

constraint_mat <-matrix(c(col_nums,lowers,uppers),nrow=length(col_nums))
firm_quarterly_varying_impute2$year_q_int <- as.numeric(as.character(firm_quarterly_varying_impute2$year_q_int))

a.out_q_varying2 <- amelia(as.data.frame(firm_quarterly_varying_impute2), m=100,ts="year_q_int", cs="registrant", p2s = 2, idvars = c("client.div.index"), logs = c("num.lobbyists","num.clients","industry.diversity","issue.diversity","num.domestic.offices","age"), bounds = constraint_mat)

pdf('QuarterlyVaryingPanel_Imputation2.pdf')
plot(a.out_q_varying2, which.vars = c(6,7,8,9,16,17))
dev.off()
pdf('QuarterlyVaryingPanel_Dispersion2.pdf')
disperse(a.out_q_varying2, dims = 1, m = 100)
dev.off()
summary(a.out_q_varying2)

rm.amelia.out <- lapply(a.out_q_varying2$imputations, function(i) plm(rev.perlobbyist.2015 ~ h.aligned + s.aligned + num.clients + client.div.index, data = pdata.frame(i,index = c("registrant", "year_q_int")),  model="random", effect="twoways"))
coeftests.amelia <- lapply(rm.amelia.out, function(i) coeftest(i, vcovHC(i, type="HC3", method = "arellano", cluster="group")))

coefnames <- rownames(coeftests.amelia[[1]])

lintests <- lapply(rm.amelia.out, function(i) linearHypothesis(i, "h.aligned =  s.aligned",  white.adjust = "hc3", test = "F"))
lintestF <- mean(do.call(rbind, lapply(lintests, function(i) i$F))[,2])

lintestpval <- mean(do.call(rbind, lapply(lintests, function(i) i$`Pr(>F)`))[,2])

lintests[[1]]

coefs.amelia <- do.call(rbind, lapply(coeftests.amelia, function(i) i[,1]))
ses.amelia <- do.call(rbind, lapply(coeftests.amelia, function(i) i[, 2]))
random_effects <- mi.meld(coefs.amelia, ses.amelia)

results.mi.re <- cbind(coefnames, as.vector(random_effects$q.mi), as.vector(random_effects$se.mi))
results.mi.re

adjrsq.amelia <- do.call(rbind, lapply(rm.amelia.out[1:100], function(i) summary(i)$r.squared[2]))
mean(adjrsq.amelia)

fstat.amelia <- do.call(rbind, lapply(rm.amelia.out[1:100], function(i) summary(i)$fstatistic$statistic))
mean(fstat.amelia)

summary(rm.amelia.out[[1]])

sink()
##### Appendix F.  Analyis of Firm Switching by Lobbyists
set.seed(12345)
cf_indiv_totals <- cf_indiv %>% group_by(contributor_ext_id) %>% summarise(D_contribs = sum(amount[party == "D"]), R_contribs = sum(amount[party == "R"]), D_share = D_contribs/(D_contribs + R_contribs))
cf_indiv_totals$D_share[cf_indiv_totals$D_share > 1] <- NA
cf_indiv_totals$D_share[cf_indiv_totals$D_share < 0] <- NA
cf_indiv_totals$Dlobbyist90 <- 0
cf_indiv_totals$Dlobbyist90[cf_indiv_totals$D_share >=.9] <- 1
cf_indiv_totals$Rlobbyist90 <- 0
cf_indiv_totals$Rlobbyist90[cf_indiv_totals$D_share <=.1] <- 1
cf_indiv_totals$Blobbyist90 <- 0
cf_indiv_totals$Blobbyist90[cf_indiv_totals$D_share >.1 & cf_indiv_totals$D_share <.9] <- 1

summary(cf_indiv_totals)


firm_lobbyists <- firm_lobbyists %>% filter(!is.na(contributor_ext_id), Year >2007 & Year <2017) %>% mutate(Registrant = trimws(toupper(Registrant)))
firm_lobbyists <- firm_lobbyists %>% filter(Registrant %in% firm_quarterly$registrant)
firm_lobbyists$prior.year <- firm_lobbyists$Year -1


firm_lobbyists <- firm_lobbyists %>% filter(Registrant %in% firm_quarterly_fixed$registrant)
###Number of firm*lobbyist observations##
nrow(firm_lobbyists)

firm_lobbyists2 <- firm_lobbyists %>% left_join(dplyr::select(firm_lobbyists,Registrant, contributor_ext_id, Year), by = c("contributor_ext_id" = "contributor_ext_id", "prior.year" = "Year"))



firm_lobbyists2 <- firm_lobbyists2 %>% left_join(dplyr::select(cf_indiv_totals, contributor_ext_id, Dlobbyist90, Rlobbyist90,Blobbyist90))

firm_lobbyists2$lobbyist_party <- "Not known"
firm_lobbyists2$lobbyist_party[firm_lobbyists2$Dlobbyist90 == 1] <- "Democrat"
firm_lobbyists2$lobbyist_party[firm_lobbyists2$Rlobbyist90 == 1] <- "Republican"
firm_lobbyists2$lobbyist_party[firm_lobbyists2$Blobbyist90 == 1] <- "Bipartisan"


fixedfirmident$firm_type <- "Uncategorized"
fixedfirmident$firm_type[fixedfirmident$D_share>.9] <- "Democratic Firm"
fixedfirmident$firm_type[fixedfirmident$D_share<.1] <- "Republican Firm"
fixedfirmident$firm_type[fixedfirmident$D_share >=.1 & fixedfirmident$D_share <= .9] <- "Bipartisan Firm"

firm_lobbyists2 <- firm_lobbyists2 %>% left_join(dplyr::select(fixedfirmident, Registrant, firm_type), by = c("Registrant.x" = "Registrant"))
firm_lobbyists2 <- firm_lobbyists2 %>% left_join(dplyr::select(fixedfirmident, Registrant, firm_type), by = c("Registrant.y" = "Registrant"))




party_switchers <- firm_lobbyists2 %>% group_by(Year,lobbyist_party) %>% summarise(num_changers = n())
switchers <- firm_lobbyists2 %>% filter(Registrant.x != Registrant.y)

####Number of lobbyists that switch firms ####
nrow(switchers)


partisan_switchers <- switchers %>% filter( Dlobbyist90 == 1 | Rlobbyist90 == 1)


partisan_rev_origin <- table(partisan_switchers$lobbyist_party, partisan_switchers$firm_type.x)
#### partisan lobbyists leaving opposite party firm
partisan_rev_origin[1,3] + partisan_rev_origin[2,2]

###
opp_party_switch <- partisan_switchers %>% filter(firm_type.x == "Republican Firm" & lobbyist_party == "Democrat" | firm_type.x == "Democratic Firm" & lobbyist_party == "Republican")

#Origin and destination of partisan lobbyists leaving out party firms
table(opp_party_switch$firm_type.x, opp_party_switch$firm_type.y)

partisan_rev_destination <- table(partisan_switchers$lobbyist_party, partisan_switchers$firm_type.y)
#### partisan lobbyists joining opposite party firm
partisan_rev_destination[1,3] + partisan_rev_destination[2,2]




###Origin and Destination Firms of all partisan lobbyists
t <- table(partisan_switchers$firm_type.x, partisan_switchers$firm_type.y)
#percent in switching between same firm type
sum(diag(t))/sum(t)

#table and chisq
t
chisq.test(t)
sink()
##### Appendix H.  Difference-in-Differences Estimation

# See above under Figure 3 and Figure 4

sink() #"Appendixes.txt")